mutate(area = "PR") %>%
mutate(type = "Intra")
B_PR_RT_inter <- read_csv(paste0(baseDir, "B_PR_RT_inter/", "B_PR_RT_inter.dists.csv"), col_names = TRUE) %>%
mutate(sub = "B") %>%
mutate(area = "PR_RT") %>%
mutate(type = "Inter")
B_PR_RT_intra <- read_csv(paste0(baseDir, "B_PR_RT_intra/", "B_PR_RT_intra.dists.csv"), col_names = TRUE) %>%
select(-sorted) %>%
mutate(sub = "B") %>%
mutate(area = "PR_RT") %>%
mutate(type = "Intra")
C_PR_inter <- read_csv(paste0(baseDir,"C_PR_inter/", "C_PR_inter.dists.csv"), col_names = TRUE) %>%
mutate(sub = "C") %>%
mutate(area = "PR") %>%
mutate(type = "Inter")
C_PR_intra <- read_csv(paste0(baseDir, "C_PR_intra/", "C_PR_intra.dists.csv"), col_names = TRUE) %>%
select(-sorted) %>%
mutate(sub = "C") %>%
mutate(area = "PR") %>%
mutate(type = "Intra")
C_PR_RT_inter <- read_csv(paste0(baseDir, "C_PR_RT_inter/", "C_PR_RT_inter.dists.csv"), col_names = TRUE) %>%
mutate(sub = "C") %>%
mutate(area = "PR_RT") %>%
mutate(type = "Inter")
C_PR_RT_intra <- read_csv(paste0(baseDir, "C_PR_RT_intra/", "C_PR_RT_intra.dists.csv"), col_names = TRUE) %>%
select(-sorted) %>%
mutate(sub = "C") %>%
mutate(area = "PR_RT") %>%
mutate(type = "Intra")
combined <- rbind(B_PR_inter,
B_PR_intra,
B_PR_RT_inter,
B_PR_RT_intra,
C_PR_inter,
C_PR_intra,
C_PR_RT_inter,
C_PR_RT_intra)
View(combined)
write_csv(combined, "combined_dists_redo.csv")
knitr::opts_chunk$set(echo = TRUE,
fig.width = 10,
fig.height = 6)
library(readr)
library(ggplot2)
library(reshape2)
library(dplyr)
library(magrittr)
pt.dists <- read_csv("combined_dists_redo.csv", col_names = TRUE) %>%
mutate(
sub = factor(sub),
type = factor(type),
area = factor(area)
)
head(pt.dists)
pt.dists %>%
ggplot(aes(x= distance, fill = type)) +
geom_histogram(binwidth = 0.001) +
facet_grid(type ~ sub + area)
pt.dists %>%
ggplot(aes(x = type, y = distance,  fill = type)) +
geom_violin() +
coord_flip() +
facet_grid(sub ~ area)
#create a empty dataframe for combining the predictions
predictions <- data.frame(distance = double(),
pHat = double())
for (Subtype in c('B','C')){
for (Target in c('PR', 'PR_RT')){
new_model = (paste(Subtype,Target,"model", sep = "_"))
print(new_model)
pt.sample <- pt.dists %>%
filter(sub == Subtype,
area == Target) %>%
sample_n(1e5)
print(summary(pt.sample$sub))
print(summary(pt.sample$type))
model <- glm(type ~ distance, data = pt.sample, family = binomial)
assign(new_model, model)
print(model)
newdata <-  data.frame(distance = seq(0, 0.15, by = 0.001))
pred <- predict(model, newdata, type = "response")
tmp <- cbind(newdata, pred) %>%
mutate(Sub = Subtype) %>%
mutate(area = Target)
predictions <- rbind(predictions, tmp)
}
}
predictions %>%
mutate(groups = paste(Sub, area, sep = "_")) %>%
ggplot(aes(x = distance, y = pred, colour = groups)) +
geom_line() +
ggtitle("Model results") +
ylab("Predicted probability")
View(pt.dists)
#create a empty dataframe for combining the predictions
predictions <- data.frame(distance = double(),
pHat = double())
for (Subtype in c('B','C')){
for (Target in c('PR', 'PR_RT')){
new_model = (paste(Subtype,Target,"model", sep = "_"))
print(new_model)
pt.sample <- pt.dists %>%
filter(sub == Subtype,
area == Target) #%>%
#sample_n(1e5)
print(summary(pt.sample$sub))
print(summary(pt.sample$type))
model <- glm(type ~ distance, data = pt.sample, family = binomial)
assign(new_model, model)
print(model)
newdata <-  data.frame(distance = seq(0, 0.15, by = 0.001))
pred <- predict(model, newdata, type = "response")
tmp <- cbind(newdata, pred) %>%
mutate(Sub = Subtype) %>%
mutate(area = Target)
predictions <- rbind(predictions, tmp)
}
}
predictions %>%
mutate(groups = paste(Sub, area, sep = "_")) %>%
ggplot(aes(x = distance, y = pred, colour = groups)) +
geom_line() +
ggtitle("Model results") +
ylab("Predicted probability")
pt.intra.dists <- pt.dists %>%
filter(type == "Intra") %>%
group_by()
View(pt.intra.dists)
View(pt.intra.dists)
B_PR_intra_counts <- read_csv("B_PR_intra.counts", col_names = FALSE)
head(B_PR_intra_counts)
B_PR_intra_counts <- read_csv("B_PR_intra.counts", col_names = FALSE) %>%
mutate(Sub_Target = B_PR_intra)
B_PR_intra_counts <- read_csv("B_PR_intra.counts", col_names = FALSE) %>%
mutate(Sub_Target = B_PR_intra)
```{r ptIntra}
B_PR_intra_counts <- read_csv("B_PR_intra.counts", col_names = FALSE) %>%
mutate(Sub_Target = "B_PR_intra")
head(B_PR_intra_counts)
B_PR_RT_intra_counts <- read_csv("B_PR_RT_intra.counts", col_names = FALSE) %>%
mutate(Sub_Targer = "B_PR_RT_intra")
C_PR_intra_counts <- read_csv("C_PR_intra.counts", col_names = FALSE) %>%
mutate(Sub_Target = "C_PR_intra")
C_PR_RT_intra_counts <- read_csv("C_PR_RT_intra.counts", col_names = FALSE) %>%
mutate(Sub_Target = "C_PR_RT_intra")
intra_clusters <- rbind(B_PR_intra_counts,
B_PR_RT_intra_counts,
C_PR_intra_counts,
C_PR_RT_intra_counts)
B_PR_intra_counts <- read_csv("B_PR_intra.counts", col_names = FALSE) %>%
mutate(Sub_Target = "B_PR_intra")
B_PR_RT_intra_counts <- read_csv("B_PR_RT_intra.counts", col_names = FALSE) %>%
mutate(Sub_Targer = "B_PR_RT_intra")
C_PR_intra_counts <- read_csv("C_PR_intra.counts", col_names = FALSE) %>%
mutate(Sub_Target = "C_PR_intra")
C_PR_RT_intra_counts <- read_csv("C_PR_RT_intra.counts", col_names = FALSE) %>%
mutate(Sub_Target = "C_PR_RT_intra")
intra_clusters <- rbind(B_PR_intra_counts,
B_PR_RT_intra_counts,
C_PR_intra_counts,
C_PR_RT_intra_counts)
View(B_PR_intra_counts)
View(B_PR_RT_intra_counts)
B_PR_intra_counts <- read_csv("B_PR_intra.counts", col_names = FALSE) %>%
mutate(Sub_Target = "B_PR_intra")
B_PR_RT_intra_counts <- read_csv("B_PR_RT_intra.counts", col_names = FALSE) %>%
mutate(Sub_Target = "B_PR_RT_intra")
C_PR_intra_counts <- read_csv("C_PR_intra.counts", col_names = FALSE) %>%
mutate(Sub_Target = "C_PR_intra")
C_PR_RT_intra_counts <- read_csv("C_PR_RT_intra.counts", col_names = FALSE) %>%
mutate(Sub_Target = "C_PR_RT_intra")
intra_clusters <- rbind(B_PR_intra_counts,
B_PR_RT_intra_counts,
C_PR_intra_counts,
C_PR_RT_intra_counts)
clusterPlot <- ggplot(intra_clusters, aes(x = X1, y = Sub_Target))+
geom_point()
clusterPlot
clusterPlot <- ggplot(intra_clusters, aes(x = X1, y = Sub_Target, color = "green", alpha = 0.5))+
geom_point()
clusterPlot
clusterPlot <- ggplot(intra_clusters, aes(x = X1, y = Sub_Target), color = "green", alpha = 0.5)+
geom_point()
clusterPlot
clusterPlot <- ggplot(intra_clusters, aes(x = X1, y = Sub_Target))+
geom_point(color = "green", alpha = 0.5)
clusterPlot
clusterPlot <- ggplot(intra_clusters, aes(x = X1, y = Sub_Target))+
geom_point(color = "magenta", alpha = 0.5)
clusterPlot
clusterPlot <- ggplot(intra_clusters, aes(x = X1, y = Sub_Target))+
geom_point(color = "magenta", alpha = 0.5, shape = 1)
clusterPlot
View(predictions)
predictions %>%
mutate(groups = paste(Sub, area, sep = "_")) %>%
ggplot(aes(x = distance, y = pred, colour = groups)) +
geom_line(linetype = area) +
ggtitle("Model results") +
ylab("Predicted probability")
View(predictions)
predictions %>%
mutate(groups = paste(Sub, area, sep = "_")) %>%
ggplot(aes(x = distance, y = pred, colour = groups)) +
geom_line(linetype = predictions$area) +
ggtitle("Model results") +
ylab("Predicted probability")
predictions %>%
mutate(groups = paste(Sub, area, sep = "_")) %>%
ggplot(aes(x = distance, y = pred, colour = groups)) +
geom_line(linetype = area) +
ggtitle("Model results") +
ylab("Predicted probability")
predictions %>%
mutate(groups = paste(Sub, area, sep = "_")) %>%
ggplot(aes(x = distance, y = pred, colour = groups, linetype = area)) +
geom_line() +
ggtitle("Model results") +
ylab("Predicted probability")
predictions %>%
mutate(groups = paste(Sub, area, sep = "_")) %>%
ggplot(aes(x = distance, y = pred, colour = groups )) +
geom_line(aes(linetype = area)) +
ggtitle("Model results") +
ylab("Predicted probability")
predictions %>%
mutate(groups = paste(Sub, area, sep = "_")) %>%
ggplot(aes(x = distance, y = pred, group = groups )) +
geom_line(aes(linetype = area)) +
ggtitle("Model results") +
ylab("Predicted probability")
predictions %>%
mutate(groups = paste(Sub, area, sep = "_")) %>%
ggplot(aes(x = distance, y = pred, group = Sub )) +
geom_line(aes(linetype = area)) +
ggtitle("Model results") +
ylab("Predicted probability")
predictions %>%
mutate(groups = paste(Sub, area, sep = "_")) %>%
ggplot(aes(x = distance, y = pred, color = Sub )) +
geom_line(aes(linetype = area)) +
ggtitle("Model results") +
ylab("Predicted probability")
predictions %>%
mutate(groups = paste(Sub, area, sep = "_")) %>%
ggplot(aes(x = distance, y = pred)) +
geom_line(aes(linetype = area, color = Sub)) +
ggtitle("Model results") +
ylab("Predicted probability")
predictions %>%
mutate(groups = paste(Sub, area, sep = "_")) %>%
ggplot(aes(x = distance, y = pred)) +
geom_line(aes(linetype = groups, color = groups)) +
ggtitle("Model results") +
ylab("Predicted probability")
predictions %>%
mutate(groups = paste(Sub, area, sep = "_")) %>%
ggplot(aes(x = distance, y = pred)) +
geom_line(aes(linetype = groups, color = Sub)) +
ggtitle("Model results") +
ylab("Predicted probability")
predictions %>%
mutate(groups = paste(Sub, area, sep = "_")) %>%
ggplot(aes(x = distance, y = pred)) +
geom_line(aes(linetype = area, color = Sub)) +
ggtitle("Model results") +
ylab("Predicted probability")
clusterPlot <- ggplot(intra_clusters, aes(x = X1, y = Sub_Target))+
geom_point(color = "magenta", alpha = 0.5, shape = 1)+
xlab("Number of sequences per patient")
clusterPlot
predictions %>%
mutate(groups = paste(Sub, area, sep = "_")) %>%
ggplot(aes(x = distance, y = pred)) +
geom_line(aes(linetype = area, color = Sub)) +
xlab(breaks = 10)+
ggtitle("Model results") +
ylab("Predicted probability")
predictions %>%
mutate(groups = paste(Sub, area, sep = "_")) %>%
ggplot(aes(x = distance, y = pred)) +
geom_line(aes(linetype = area, color = Sub)) +
ggtitle("Model results") +
ylab("Predicted probability")
setwd("/home/armand/Desktop/phylophile_writeup/benchmarks/selenium_time_analysis/5_best")
library(chron)
library(ggplot2)
library(msaR)
library(seqinr)
library(ape)
infile <- read.csv("combined.csv", header = FALSE, stringsAsFactors = FALSE)
aln_metrix <- read.csv("aln_metrix.txt",header = FALSE, sep = ",")
colnames(infile) <- c("submitted", "blastResults", "inputStart",
"blastStart", "mafftStart", "trimalStart",
"fasttreeStart", "renderStart")
colnames(aln_metrix) <- c('submitted', 'num_seq', 'aln_length')
infile <- cbind(infile,aln_metrix)
infile$num_seqXaln_length <- infile$num_seq * infile$aln_length
infile$inputStart <- times(infile$inputStart,format="h:m:s")
infile$blastStart <- times(infile$blastStart,format="h:m:s")
infile$mafftStart <- times(infile$mafftStart,format="h:m:s")
infile$trimalStart <- times(infile$trimalStart,format="h:m:s")
infile$fasttreeStart <- times(infile$fasttreeStart,format="h:m:s")
infile$renderStart <- times(infile$renderStart,format="h:m:s")
infile$blastTime <- as.numeric(infile$mafftStart - infile$blastStart)*24*60*60
infile$mafftTime <- as.numeric(infile$trimalStart - infile$mafftStart)*24*60*60
infile$trimalTime <- as.numeric(infile$fasttreeStart - infile$trimalStart)*24*60*60
infile$fasttreeTime <- as.numeric(infile$renderStart - infile$fasttreeStart)*24*60*60
#some basic sacatter plots for exploration
plot(infile$submitted, infile$blastTime, main = "BLASTn time vs. input",
xlab = "no. input", ylab = "BLASTn time (s)",  pch=19)
abline(lm(infile$blastTime~infile$submitted), col="red")
#As can be seen from the graph above the amount of time it takes blast to
#complete is linearly proportional to the number of input sequences.
plot(infile$submitted, infile$blastResults, main = "Number of blast results (best 5) vs. input",
xlab = "no. input", ylab = "no blast results retrieved", pch = 19)
abline(lm(infile$blastResults ~ infile$submitted))
lm(infile$blastResults ~ infile$submitted)
#The graph above illustrates the linear relationship of number of samples submitted
# vs. the number of blast results.  Allthough, 5 best hits were requested some sequences
# will retrieve the same results.  These 'duplicates' must be removed based on fasta
# headers/names.  With the dataset of Kim, the slope is 4.5, thus for 2 input, 9 will be
# retrieved by blast and the rest of the analysis will be done on those 9.  I think
# this slope will depend on the size of your data and the diversity thereof.  Then also
# largely it will depend an the diversity of your input.
attach(infile)
fit <- nls(mafftTime~(b * blastResults**a), start = list(a=1,b=0.1))
plot(infile$blastResults, infile$mafftTime, main = "MAFFT time vs. BLASTn results (best 5)",
xlab = "no. BLASTn results" , ylab = "MAFFT time (s)",  pch=19)
lines(blastResults, predict(fit))
cor(mafftTime,predict(fit))
summary(fit)
coef(fit)
#Thus fit equation is:  y = b x X^a,   where y = MAFFT time and x = blastResults
#  y = 0.02797636 + X^2.10633222
#As one would expect, the multiple sequence alignment does not scale linearly
# and is the most computational expensive step in the analysis.  Here we can see
# that to analyse an input of 50, we retrieve additional sequences to a total of
# 225.  The multiple sequence alingment of these 225 sequences took about 42 minutes.
# From the non linear regression:
# As example, submitting 24 samples will after blast, result in 108 samples for multiple
# sequence alingment, 0.028 * 108^2 = 327 seconds (5:27).  It will be interesting to
# see how mega will do with this.
# Below the residual plots
plot(blastResults, resid(fit))
abline(h = 0, col = 'red', lty=3)
plot(density(resid(fit)))
abline(v = 0, col = 'red', lty=3)
# QQ plots
qqnorm(resid(fit))
qqline(resid(fit))
# From the 'goodness of fit' plots above, we can see that we do have a resonable fit
# The risidual plot does scater around 0 residual, but there is a desernable pattern
# This is also shown in the residual density plot.  Notice, in the scatter plot of
# the residuals, the outlier at the far right, corresponts to the bump at the far right
# of the density plot.  The same is true for the left hand bump on the density plot and
# the lower than 0 deveation on the residual plot.
fit2 <- nls(trimalTime~(b * blastResults**a), start = list(a=1,b=0.1))
cor(trimalTime,predict(fit2))
summary(fit2)
coef(fit2)
plot(infile$blastResults, infile$trimalTime, main = "trimAll time vs. BLASTn results (best 5)",
xlab = "no. BLASTn results" , ylab = "trimAll time (s)",  pch=19)
lines(blastResults, predict(fit2))
resid(fit2)
plot(blastResults, resid(fit2))
abline(h = 0, col = 'red', lty=3)
plot(density(resid(fit2)))
abline(v = 0, col = 'red', lty=3)
qqnorm(resid(fit2))
qqline(resid(fit2))
plot(infile$blastResults, infile$fasttreeTime, main = "FastTree time vs. BLASTn results (best 5)",
xlab = "no. BLASTn results" , ylab = "FastTree time (s)",  pch=19)
plot(infile$num_seqXaln_length, infile$fasttreeTime, main = "FastTree time vs. num_seqsXlen_aln ",
xlab = "num_seqsXlen_aln" , ylab = "FastTree time (s)",  pch=19)
#fasttree time vs BLASTn results shows some outliers from the linear trend
#this migh be due to less complexity in the sequences, probably shorter
pft <- ggplot(infile, aes(x = blastResults, y = fasttreeTime)) +
geom_point() +
geom_text(infile, mapping = aes(x = blastResults,
y = fasttreeTime, label = submitted), vjust = -1) +
theme(panel.background = element_rect(fill = "white")) +
geom_smooth(method = "lm", se = TRUE)
pft
pft2 <- ggplot(infile, aes(x = num_seqXaln_length, y = fasttreeTime)) +
geom_point() +
geom_text(infile, mapping = aes(x = num_seqXaln_length,
y = fasttreeTime, label = submitted), vjust = -1) +
theme(panel.background = element_rect(fill = "white")) +
geom_smooth(method = "lm", se = TRUE)
pft2
setwd("/home/armand/Desktop/phylophile_writeup/benchmarks/selenium_time_analysis/5_best")
setwd("/home/armand/Desktop/phylophile_writeup/benchmarks/lanl_POL_CDS/selenium")
infile <- read.csv("timeFile.csv", header = FALSE, stringsAsFactors = FALSE)
library(reshape2)
library(ggplot2)
library(gridExtra)
colnames(infile) <- c("N_in", "process", "time")
#connvert N_in to actual values
infile$N_in <- as.integer(substr(infile$N_in, 19, length(infile$N_in)-14))
##blast hits vs queries
#create a new df with the N_in and N_blst_hits
blastHits <- infile[infile$process=="blastHits",][,-2]
colnames(blastHits) <- c("N_in", "N_blst")
#creat on theme for ggplot to use thoughout
phyloPiTheme <- theme_bw(base_size = 14) +
theme(axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank())
#plot and fit N_blst against N_in, force through 0
fit <-  lm(blastHits$N_blst ~ blastHits$N_in-1)
coefficients(fit)
summary(fit)
nBlstPlot <- ggplot(blastHits, aes(x = N_in, y = N_blst)) +
geom_point(shape = 1) +
geom_smooth(method=lm, se = FALSE, colour='black',formula=y~x-1, size = 0.25) +
labs(x="Number of input sequences", y="Number of sequences retrieved by BLAST") +
annotate("text", x=41, y=72, label = "y == 4.628 * x", parse=T)+
annotate("text", x=40, y=60, label = "R^2 == 0.998", parse=T) +
#annotate("label", x = 0, y = 250)
phyloPiTheme
nBlstPlot
ggsave(file="blast hits vs queries.svg", plot=nBlstPlot)
#parse the dataframe for blast
blast <- infile[infile$process=="blast",][-2]
blast <- cbind(blastHits, blast)[-3]
fitBlastT <-  lm(blast$time ~ blast$N_in-1)
coefficients(fitBlastT)
summary(fitBlastT)
blastT <- ggplot(blast, aes(x = N_in, y = time)) +
geom_point(shape = 1) +
geom_smooth(method=lm, se = FALSE, colour='black',formula=y~x-1, size = 0.25) +
labs(x="Number of input sequences", y="time (s)") +
annotate("text", x=40, y=200, label = "y == 11.02 * x", parse=T)+
annotate("text", x=40, y=170, label = "R^2 == 1", parse=T) +
phyloPiTheme
blastT
ggsave(file="blast time.svg", plot = blastT)
#parse the dataframe for mafft
mafft <- infile[infile$process=="mafftTime",][-2]
mafft <- cbind(blastHits, mafft)[-3]
#plot and fit time against N-blst, mafft
t <- mafft$time
N <- mafft$N_blst
fit <- nls(t~b * N**a, start = list(a=2,b=0.1))
cor(t,predict(fit))
summary(fit)
fit2 <- nls(t~b * N**2, start = list(b=0.1))
summary(fit2)
anova(fit, fit2)
mafftT <- ggplot(mafft, aes(x=N_blst, y=time))+
geom_point(shape = 1) +
geom_smooth(method="nls",
formula = y ~ b * x**a,
method.args=list(start = c(a = 2, b = 0.1)),
se = FALSE, colour='black', size = 0.25) +
labs(x="Number of sequences in alignment", y="time (s)") +
annotate("text", x=190, y=1800, label = "y == 0.153 * x^1.92", parse=T)+
phyloPiTheme
mafftT
ggsave(file="mafftTime.svg", plot=mafftT)
y <- mafft$time
x <- mafft$N_blst
fitMafft <- nls(y ~ b * x**a, start = list(a = 2, b = 0.1))
plot(x=mafft$N_blst, y=mafft$time)
lines(x, predict(fitMafft))
plot(x, resid(fitMafft))
hist(resid(fitMafft), breaks = 25)
abline(h=0)
qqnorm(resid(fitMafft))
qqline(resid(fitMafft))
#plot for fastTree
#for number of queries in input
fastTreeNq <- infile[infile$process == "fasttreeTime",][-2]
#add blastHits here
fastTreeWithBlstHits <- cbind(fastTreeNq,blastHits)[-3]
##an of time vs input query
fitFastTreeNq <-  lm(fastTreeNq$time ~ blast$N_in-1)
coefficients(fitFastTreeNq)
summary(fitFastTreeNq)
fastTreeNqP <- ggplot(fastTreeNq, aes(x = N_in, y = time)) +
geom_point(shape = 1) +
geom_smooth(method=lm, se = FALSE, colour='black',formula=y~x-1, size = 0.25) +
labs(x="Number of query sequences", y="time (s)") +
annotate("text", x=40, y=70, label = "y == 3.02 * x", parse=T)+
annotate("text", x=40, y=60, label = "R^2 == .994", parse=T)+
phyloPiTheme
fastTreeNqP
ggsave(file="fastTree query sequences.svg", plot = fastTreeNqP)
##an of time vs blast results, thus number of samples in analysis
fitFastTreeWithBlstHits <-  lm(fastTreeWithBlstHits$time ~ fastTreeWithBlstHits$N_blst -1)
coefficients(fitFastTreeWithBlstHits)
summary(fitFastTreeWithBlstHits)
fastTreeWithBlastHitsP <- ggplot(fastTreeWithBlstHits, aes(x = N_blst, y = time)) +
geom_point(shape = 1) +
geom_smooth(method=lm, se = FALSE, colour='black',formula=y~x-1, size = 0.25) +
labs(x="Number of sequences in alingment", y="time (s)") +
annotate("text", x=170, y=60, label = "y == 0.652 * x", parse=T)+
annotate("text", x=170, y=50, label = "R^2 == .993", parse=T)+
phyloPiTheme
fastTreeWithBlastHitsP
ggsave(file="fastTree total sequences.svg", plot = fastTreeWithBlastHitsP)
plots = list(nBlstPlot + ggtitle("(a)"),
blastT + ggtitle("(b)"),
mafftT + ggtitle("(c)"),
fastTreeWithBlastHitsP + ggtitle("(d)"))
figure <- grid.arrange(grobs= lapply(plots, "+", theme(plot.margin=margin(10,25,10,25))))
figure
ggsave(file = "benchFig.svg",plot = figure, width = 7.66, height = 10)
ggsave(file = "benchFig.png",plot = figure, width = 7.66, height = 10)
